# Import necessary libraries
import requests
import pandas as pd
import time
import requests_cache
import re
from bs4 import BeautifulSoup
import datetime
import itertools
import threading
import plotly.express as px
import altair as alt
session = requests_cache.CachedSession('project_cache')
# Define list of cities and their most central zip codes
city_list = 'Youngstown' , 'Pittsburgh', 'Washington DC','Dover', 'Philadelphia', 'New York City','Baltimore'
zipcodes = '44413', '15201', '20059' , '19901', '19148' , '10001', '21201'
# Create a list of dates 30 days AFTER the derailment
start_date = datetime.datetime.strptime("2023-02-03", "%Y-%m-%d")
K = 30 # 30 days after
date_generated = pd.date_range(start_date, periods=K)
# List of dates
month_after_dates = list(date_generated.strftime("%Y-%m-%d"))
# Create a list of dates 30 days BEFORE the derailment
start_date = datetime.datetime.strptime("2023-01-05", "%Y-%m-%d")
K = 30 # 30 days after
date_generated = pd.date_range(start_date, periods=K)
# List of dates
month_before_dates = list(date_generated.strftime("%Y-%m-%d"))
# Make a function that takes in zipcode and date value
def get_data(zipcode, date):
url = "https://www.airnowapi.org/aq/observation/zipCode/historical/" + "?format=application/xml" +"&zipCode=" + \
zipcode + "&date=" + date + "T00-0000"+"&distance=25" + "&api_key=6DF75E4D-6FD6-4147-80D4-823042F30E27"
response = requests.get(url, timeout=30)
return response.text
# Make a function that processes the data
def fetch_and_process_data(zipcode, dates):
# Fetch the data
data_output = ''
for i in dates:
data_output += get_data(zipcode,i) #extend array with each iteration
print(data_output)
# Start compiling output
xml = BeautifulSoup(data_output, "html.parser")
# Extract parameter
parameter = xml.find_all('parametername')
parameter_values = re.findall(r'<parametername>(.*?)<\/parametername>', str(parameter))
# Fetch the aqi values
aqi = xml.find_all('aqi')
aqi_values = re.findall(r'<aqi>(\d+)</aqi>', str(aqi))
# Fetch the dates
date = xml.find_all('dateobserved')
date_values = re.findall(r'<dateobserved>(\d{2}/\d{2}/\d{4})', str(date))
# Fetch location
area = xml.find_all('reportingarea')
area_values = re.findall(r'<reportingarea>(.*?)</reportingarea>', str(area))
# Compile all info into one dataset
data = {'Date': date_values,
'AQI': aqi_values,
'Parameter': parameter_values,
'Location': area_values}
# Convert dataset to data frame
data_df = pd.DataFrame(data,columns=['Date','AQI','Parameter','Location'])
data_df = pd.DataFrame(data)
return data_df
# Retrieve data for all 6 locations
#Youngstown
youngstown_after = fetch_and_process_data(zipcodes[0],month_after_dates)
youngstown_before = fetch_and_process_data(zipcodes[0],month_before_dates)
#Pittsburgh
pittsburgh_after = fetch_and_process_data(zipcodes[1],month_after_dates)
pittsburgh_before = fetch_and_process_data(zipcodes[1],month_before_dates)
#Washington DC
DC_after = fetch_and_process_data(zipcodes[2],month_after_dates)
DC_before = fetch_and_process_data(zipcodes[2],month_before_dates)
#Dover
dover_after = fetch_and_process_data(zipcodes[3],month_after_dates)
dover_before = fetch_and_process_data(zipcodes[3],month_before_dates)
#Philadelphia
philadelphia_after = fetch_and_process_data(zipcodes[4],month_after_dates)
philadelphia_before = fetch_and_process_data(zipcodes[4],month_before_dates)
#New York City
newyork_after = fetch_and_process_data(zipcodes[5],month_after_dates)
newyork_before = fetch_and_process_data(zipcodes[5],month_before_dates)
#Baltimore
baltimore_after = fetch_and_process_data(zipcodes[6],month_after_dates)
baltimore_before = fetch_and_process_data(zipcodes[6],month_before_dates)
# Concatenate all data frames
compiled_data = pd.concat([youngstown_before, youngstown_after,pittsburgh_before, pittsburgh_after, DC_before, DC_after,
dover_before, dover_after, philadelphia_before, philadelphia_after, newyork_before,
newyork_after,baltimore_before,baltimore_after], axis=0)
# Create a new DataFrame with a unique index
compiled_data_new_index = compiled_data.reset_index(drop=True)
# Reindex the new DataFrame with the new index
new_index = range(0,len(compiled_data))
compiled_data_new_index = compiled_data_new_index.reindex(new_index)
# Convert AQI values from str to int
compiled_data_new_index['AQI'] = compiled_data_new_index['AQI'].astype(int)
# Extract PM2.5 values
row_pm25 = compiled_data_new_index.loc[compiled_data_new_index['Parameter'] == 'PM2.5']
# Sort aqi values
pm25_sorted_aqi = row_pm25.sort_values('AQI')
# Sort dates
pm25_sorted_aqi['Date'] = pd.to_datetime(pm25_sorted_aqi['Date'])
pm25_sorted_dates = pm25_sorted_aqi.sort_values('Date')
# Extract PM10 values
row_pm10 = compiled_data_new_index.loc[compiled_data_new_index['Parameter'] == 'PM10']
# Sort aqi values
pm10_sorted_aqi = row_pm10.sort_values('AQI')
# Sort dates
pm10_sorted_aqi['Date'] = pd.to_datetime(pm10_sorted_aqi['Date'])
pm10_sorted_dates = pm10_sorted_aqi.sort_values('Date')
# Extract Ozone valuees
row_ozone = compiled_data_new_index.loc[compiled_data_new_index['Parameter'] == 'OZONE']
# Sort aqi values
ozone_sorted_aqi = row_ozone.sort_values('AQI')
# Sort dates
ozone_sorted_aqi['Date'] = pd.to_datetime(ozone_sorted_aqi['Date'])
ozone_sorted_dates = ozone_sorted_aqi.sort_values('Date')
# sort aqi values
df_sorted_aqi = compiled_data_new_index.sort_values('AQI')
# sort dates
df_sorted_aqi['Date'] = pd.to_datetime(df_sorted_aqi['Date'])
df_sorted_dates = df_sorted_aqi.sort_values('Date')
# Plot PM2.5 values
fig_pm25 = px.line(pm25_sorted_dates, x="Date", y="AQI", symbol="Location", color = "Location", title = 'PM2.5')
fig_pm25.update_traces(textposition="bottom right")
fig_pm25.show()
# Plot PM10 values
fig_ozone = px.line(ozone_sorted_dates, x="Date", y="AQI", symbol="Location", color = "Location", title = 'Ozone')
fig_ozone.update_traces(textposition="bottom right")
fig_ozone.show()
# Plot PM10 values
fig_pm10 = px.line(pm10_sorted_dates, x="Date", y="AQI", symbol="Location", color = "Location", title = 'PM10')
fig_pm10.update_traces(textposition="bottom right")
fig_pm10.show()